%% Set paths:

clear;clc;close all
%% Table OA.1. Chi-Square Goodness of Fit Test of the Distribution of Assets
data_orig           = readtable('bhc_and_commercial_hh_min10_branches_for_matlab_nobranchconstraints.csv');
data_orig.z         = data_orig.assets./data_orig.brnumber_hh; % Create additional variables
amin = 1;
amax = 250;
cond        = data_orig.assets >= amin & data_orig.assets <= amax & (data_orig.post == 0 | data_orig.post ==2); %& data_orig.datequarter_num <= 2017.5;
data        = data_orig(cond,:);
data.year   = floor(data.datequarter_num);
apre  = data(data.post == 0 ,:).assets;
apost = data(data.post == 2,:).assets;

%  uniform, normal, lognormal, exponential, gamma, extreme value�
%  
%Pre DF:
betaq = 1.05;
qmin = 1;
qmax = Inf;
c = (betaq-1)/(qmin^(1-betaq));
cdf_power     = @(x,b) ((b-1)/(qmin^(1-b)))./(1-b) * ( x.^(1-b) - qmin ^(1-b));
%cdf_power_cond = @(x,b) (((b-1)/(qmin^(1-b)))./(1-b) * ( x.^(1-b) - qmin ^(1-b))) /((cdf_power(qmax,b)));

% F_X|a <x<b = F(x) - F(a) /(F(b) - F(a))
pdf_power = @(x,b) (b-1)./(qmin.^(1-b)) .* x.^(-b);
%pdf_power_cond = @(x,b) (b-1)./(qmin.^(1-b)) .* x.^(-b) /(cdf_power(qmax,b));


y = 1;

number_of_dist = 11;
years_list = min(data(data.post == 0,:).datequarter_num):0.25:max(data(data.post==0,:).datequarter_num);
pvals     = nan(number_of_dist,length(years_list));
stats_mat = nan(number_of_dist,length(years_list));
names = {};
number_of_bins  = 20;
for y= 1:length(years_list)
    
    apre = data(data.post == 0 & data.datequarter_num == years_list(y) ,:).assets;
    loglik = @(b) -sum(log(pdf_power(apre,b))); % Fit beta:
    [param, fval] = fminsearch(loglik,2);

    i = 1;
    names{i}       = 'Power Law';
    [~,p,stats]  = chi2gof(apre,'cdf',@(x) cdf_power(x,param),'NBins', number_of_bins);
    pvals(i,y)     = p;
    stats_mat(i,y) = stats.chi2stat;
    
    i = i+1;
    names{i}       = 'Uniform';
    [~,p,stats] = chi2gof(apre,'CDF',@(x) (x - amin)/(amax-amin),'NBins', number_of_bins);
    pvals(i,y)     = p;
    stats_mat(i,y) = stats.chi2stat;
    
    i = i+1;
    names{i}       = 'Weibull';
    pd = fitdist(apre,names{i});
    [~,p,stats] = chi2gof(apre,'CDF',pd,'NBins', number_of_bins);
    pvals(i,y)     = p;
    stats_mat(i,y) = stats.chi2stat;
    
      i = i+1;
    names{i}       = 'Normal';
    pd = fitdist(apre,names{i});
    [~,p,stats] = chi2gof(apre,'CDF',pd,'NBins', number_of_bins);
    pvals(i,y)     = p;
    stats_mat(i,y) = stats.chi2stat;
    
    i = i+1;
    names{i}       = 'Lognormal';
    pd = fitdist(apre,names{i});
    [~,p,stats] = chi2gof(apre,'CDF',pd,'NBins', number_of_bins);
    pvals(i,y)     = p;
    stats_mat(i,y) = stats.chi2stat;
    
   
    i = i+1;
    names{i}       = 'Exponential';
    pd = fitdist(apre,names{i});
    [~,p,stats] = chi2gof(apre,'CDF',pd,'NBins', number_of_bins);
    pvals(i,y)     = p;
    stats_mat(i,y) = stats.chi2stat;
    
     
    i = i+1;
    names{i}       = 'Extreme Value';
    pd = fitdist(apre,'ExtremeValue');
    [~,p,stats] = chi2gof(apre,'CDF',pd,'NBins', number_of_bins);
    pvals(i,y)     = p;
    stats_mat(i,y) = stats.chi2stat;
    
 
    
    i = i+1;
    names{i}       = 'Gamma';
    pd = fitdist(apre,names{i});
    [~,p,stats] = chi2gof(apre,'CDF',pd,'NBins', number_of_bins);
    pvals(i,y)     = p;
    stats_mat(i,y) = stats.chi2stat;
    
end

pval_vec = mean(pvals,2);
stat_vec = mean(stats_mat,2);

% Export table
FID = fopen(fullfile('distribution_goodness_of_fit.tex'), 'w');
fprintf(FID, '\\begin{tabular*}{\\linewidth}{@{\\extracolsep{\\fill}}lll@{}} ');
fprintf(FID, '\\toprule ');
fprintf(FID, 'Distribution  & P-val   & Chi-Square Stat. \\\\ \\midrule ');
for i = 1:length(names)
    mypval = pval_vec(i);
    mystat = stat_vec(i);
    if mypval < 0.001
        mypval = '\\textless 0.001';
    end
    fprintf(FID, concatenate_text(2,{' ',names{i},'  &' , mypval ,'  & ',mystat,'             \\\\ '}));
end
fprintf(FID, ' \\bottomrule  \\end{tabular*} ');


